{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "2e8a0fac-adbd-4428-90ea-869aee2b95bf",
   "metadata": {},
   "source": [
    "## Xview Dataset clipping\n",
    "This is a component designed to clip the dataset provided by xview. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c0421aaf-4da4-4a66-b626-e8962315afe8",
   "metadata": {},
   "outputs": [],
   "source": [
    "#!pip install Pillow claimed aiobotocore botocore s3fs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0b7ad533-dabc-488e-bf65-59a54a85f3f9",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "import os\n",
    "from PIL import Image\n",
    "from c3 import operator_utils\n",
    "import s3fs\n",
    "import pathlib\n",
    "import math\n",
    "from PIL import Image, TiffTags, TiffImagePlugin\n",
    "import imageio\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c8df047b-6167-43f1-9a31-d836580180ac",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "#source is the path to the folder with the unzipped .tif images from xview dataset \n",
    "source = os.environ.get(\"directory_path\")\n",
    "\n",
    "#destination is the path to the folder which saves all the extracted tiles. \n",
    "destination = os.environ.get(\"destination\")\n",
    "\n",
    "#Each image is cropped using a rectangular window with edge_length tile_size_x and tile_size_y which has to be given in number of pixels \n",
    "tile_size_x = int(os.environ.get(\"tile_size_x\", 64))\n",
    "tile_size_y = int(os.environ.get(\"tile_size_y\", 64))\n",
    "\n",
    "#stride_x is the length in pixels the sliding window is moved to the right after each step\n",
    "#For tumbling window stride_x must equal tile_size_x and stride_y must equal tile_size_y\n",
    "stride_x = int(os.environ.get(\"stride_x\", 32))\n",
    "#stride_y is the length in pixels the sliding window is moved down after completing a row\n",
    "stride_y = int(os.environ.get(\"stride_y\", 32))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9a2a67d1-c8a4-49f7-b0bf-8710e27c8387",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "(access_key_id_source, secret_access_key_source, endpoint_source, source) = operator_utils.explode_connection_string(source)\n",
    "(access_key_id_destination, secret_access_key_destination, endpoint_destination, destination) = operator_utils.explode_connection_string(destination)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "12241cb1-1d84-48a6-9f9e-ba534822a661",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "s3source = s3fs.S3FileSystem(\n",
    "    anon=False,\n",
    "    key=access_key_id_source,\n",
    "    secret=secret_access_key_source,\n",
    "    client_kwargs={'endpoint_url': endpoint_source}\n",
    ")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "24926242-b0ec-46cd-bbf3-4da1dbd989e0",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "s3destination = s3fs.S3FileSystem(\n",
    "    anon=False,\n",
    "    key=access_key_id_destination,\n",
    "    secret=secret_access_key_destination,\n",
    "    client_kwargs={'endpoint_url': endpoint_destination}\n",
    ")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e8370f73-dcef-4865-ab70-fd0a0891ab71",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "for item in s3source.ls(source):\n",
    "    s3source.get(item, item)\n",
    "    _, file_extension = os.path.splitext(item)\n",
    "    clipped_item = os.path.join(destination, item)\n",
    "\n",
    "    image = Image.open(item) \n",
    "    width, height = image.size\n",
    "\n",
    "    x_range = [0]\n",
    "    while(x_range[-1] + stride_x + tile_size_x < width):\n",
    "        x_range += [x_range[-1] + stride_x]\n",
    "    \n",
    "    y_range = [0]\n",
    "    while(y_range[-1] + stride_y + tile_size_y < height):\n",
    "        y_range += [y_range[-1] + stride_y]\n",
    "\n",
    "    og_metadata = image.tag_v2\n",
    "    initial_lon = og_metadata[33922][3]\n",
    "    initial_lat = og_metadata[33922][4]\n",
    "    pixel_height_deg = og_metadata[33550][1]\n",
    "    pixel_width_deg = og_metadata[33550][0]\n",
    "\n",
    "    for x in x_range:\n",
    "        for y in y_range:\n",
    "            left = x * tile_size_x\n",
    "            upper = y * tile_size_y\n",
    "            right = left + tile_size_x\n",
    "            lower = upper + tile_size_y\n",
    "\n",
    "            upper_left_lat = initial_lat + y  * pixel_height_deg\n",
    "            upper_left_lon = initial_lon + x  * pixel_width_deg\n",
    "\n",
    "            cropped = image.crop((x,y, x+tile_size_x, y+tile_size_y))\n",
    "            \n",
    "            cropped.save(\"temporary_exchange.tif\")\n",
    "            cropped = Image.open(\"temporary_exchange.tif\")\n",
    "\n",
    "            available_metadata = cropped.tag_v2\n",
    "            \n",
    "            tiffinfo = TiffImagePlugin.ImageFileDirectory_v2()\n",
    "            \n",
    "            for tag, value in og_metadata.items():\n",
    "                if not(tag in available_metadata or tag == 339 or tag == 33922):\n",
    "                    tiffinfo[tag] = value\n",
    "               \n",
    "            tiffinfo[33922] = (0.0,0.0,0.0, upper_left_lon, upper_left_lat, 0.0)\n",
    "\n",
    "            dest_path = f'{clipped_item}.{x}.{y}{file_extension}'\n",
    "            pathlib.Path(os.path.dirname(dest_path)).mkdir(parents=True, exist_ok=True)\n",
    "            \n",
    "            cropped.save(dest_path, tiffinfo = tiffinfo, save_all=True, compression=\"tiff_deflate\")\n",
    "            \n",
    "            clipped_item_upload = os.path.join(destination, os.path.basename(dest_path))\n",
    "            s3destination.put(dest_path, clipped_item_upload)\n",
    "            "
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
